library(knitr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(lubridate)
library(here)
## here() starts at C:/Users/la_pa/OneDrive/Documentos/R/Portfolio_DS241
library(sf)
## Warning: package 'sf' was built under R version 4.3.2
## Linking to GEOS 3.11.2, GDAL 3.7.2, PROJ 9.3.0; sf_use_s2() is TRUE
library(tmap)
## Warning: package 'tmap' was built under R version 4.3.2
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
library(tidycensus)
## Warning: package 'tidycensus' was built under R version 4.3.2
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.3.2
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

Import Dataset

neigh = st_read(here("data_raw","DC_Health_Planning_Neighborhoods.geojson")) %>% clean_names()
## Reading layer `DC_Health_Planning_Neighborhoods' from data source 
##   `C:\Users\la_pa\OneDrive\Documentos\R\Portfolio_DS241\data_raw\DC_Health_Planning_Neighborhoods.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 51 features and 8 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -77.11976 ymin: 38.79165 xmax: -76.9094 ymax: 38.99556
## Geodetic CRS:  WGS 84
plot(neigh)

Investigating Joining Spatial and Non-Spatial Data

dc_c = read_csv(here("data_raw","DC_COVID-19_Total_Positive_Cases_by_Neighborhood.csv")) %>% clean_names()
## Rows: 26372 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): DATE_REPORTED, NEIGHBORHOOD
## dbl (2): OBJECTID, TOTAL_POSITIVES
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_cases=dc_c %>%
  filter(as_date(date_reported) == "2021-11-17") %>% 
  separate(neighborhood,into=c("code","name"),sep = ":") %>%
  mutate(code=case_when(code=="N35" ~"N0",
                        TRUE ~ code)) %>%
  select(-date_reported)
neigh2 = left_join(neigh, df_cases, by=c("code"))
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(neigh2) +tm_polygons("total_positives", alpha = .5)
df_census = get_acs(geography = "tract",
                    variables = c("median_inc"="B06011_001",
                                  "pop" = "B01003_001E",
                                  "pop_black" = "B02009_001"),
                    state = "DC", geometry = TRUE, year = 2021)
## Getting data from the 2017-2021 5-year ACS
## Warning: • You have not set a Census API key. Users without a key are limited to 500
## queries per day and may experience performance limitations.
## ℹ For best results, get a Census API key at
## http://api.census.gov/data/key_signup.html and then supply the key to the
## `census_api_key()` function to use it throughout your tidycensus session.
## This warning is displayed once per session.
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |======================================================================| 100%
class(df_census)
## [1] "sf"         "data.frame"
plot(df_census)

dc_cases_filtered <- dc_c %>%
  filter(as.Date(date_reported) == as.Date("2021-11-17")) %>%
  separate(neighborhood, into = c("code", "name"), sep = ":") %>%
  mutate(code = case_when(code == "N35" ~ "N0", TRUE ~ code)) %>%
  select(-date_reported)

df_combined <- left_join(dc_cases_filtered, df_census, by = c("code" = "GEOID"))

neigh_with_crime <- left_join(neigh, df_combined, by = "code")

tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(neigh_with_crime) +
  tm_bubbles(size = "total_positives", col = "total_positives", alpha = 0.7) +
  tm_borders() +
  tm_layout(title = "Total Positives and Demographic Data by Neighborhood")
## Legend for symbol sizes not available in view mode.
tm_shape(neigh_with_crime) +
  tm_bubbles(size = "total_positives", col = "total_positives", alpha = 0.7) +
  tm_borders() +
  tm_shape(df_census) +
  tm_fill("estimate", style = "cont") +  # Use style "cont" for continuous variables
  tm_borders() +
  tm_layout(title = "Estimates: District of Columbia 2021")
## Legend for symbol sizes not available in view mode.
dc_c$date_reported <- as.Date(dc_c$date_reported)

neighborhood_totals <- dc_c %>%
  group_by(neighborhood) %>%
  summarize(total_positives = sum(total_positives, na.rm = TRUE)) %>%
  filter(!is.na(neighborhood) & !grepl("^unknown$", neighborhood, ignore.case = TRUE))  

sorted_neighborhoods <- neighborhood_totals %>% arrange(total_positives)

cat("Neighborhoods with the Least Amount of Total Positives:\n")
## Neighborhoods with the Least Amount of Total Positives:
least_5 <- head(sorted_neighborhoods, 5)
for (i in 1:5) {
  cat("\"", least_5$neighborhood[i], "\": ", least_5$total_positives[i], "\n", sep = "")
}
## "N24: GWU/ FOGGY BOTTOM": 189
## "N24: GWU": 13806
## "N35: NATIONAL MALL": 23835
## "N36: NAVAL STATION & AIR FORCE": 35027
## "N15: DC MEDICAL CENTER": 62004
cat("\n")
cat("Neighborhoods with the Most Amount of Total Positives:\n")
## Neighborhoods with the Most Amount of Total Positives:
most_5 <- tail(sorted_neighborhoods, 5)
for (i in 1:5) {
  cat("\"", most_5$neighborhood[i], "\": ", most_5$total_positives[i], "\n", sep = "")
}
## "N43: SW/WATERFRONT": 619367
## "N12: CHINATOWN": 619941
## "N7: BRIGHTWOOD": 632259
## "N1: 16th ST HEIGHTS": 693368
## "N13: COLUMBIA HEIGHTS": 891102